import { CheerioCrawler, Dataset } from "crawlee";

// CheerioCrawler crawls the web using HTTP requests
// and parses HTML using the Cheerio library.
const crawler = new CheerioCrawler({
  // Use the requestHandler to process each of the crawled pages.
  async requestHandler({ request, $, enqueueLinks, log }) {
    const title = $("title").text();
    const body = $("body").text();
    log.info(`Title of ${request.loadedUrl} is '${title}'`);
    console.log(`[yanle] - title`, `Title of ${request.loadedUrl} is '${title}'`);

    // Save results as JSON to ./storage/datasets/default
    await Dataset.pushData({ title, url: request.loadedUrl });

    // Extract links from the current page
    // and add them to the crawling queue.
    await enqueueLinks();
  },

  // Let's limit our crawls to make our tests shorter and safer.
  maxRequestsPerCrawl: 50,
});

// Add first URL to the queue and start the crawl.
crawler.run(["https://crawlee.dev"]);
